from requests_html import HTMLSession



# http://www.xinhuanet.com/

if __name__=="__main__":
    print('start...')
    session = HTMLSession()
    url = "http://www.xinhuanet.com/"

    res = session.get(url=url)
    # print(res.text)
    print(res.encoding)
    # print(res.text.encode('ISO-8859-1').decode('utf-8'))

    # 分类 colsItem
    # //*[@id="navBody"]/div/ul[1]

    items = res.html.xpath("//div[@id='navBody']/div/ul/li[@class='item']/a")

    print(items)

    for item in items:
        # 分类名称
        item_name = item.xpath("//text()")[0]
        # 分类url
        item_url = item.xpath("//@href")[0]
        if not item_url.startswith(('http')):
            item_url = "http://www.xinhuanet.com/"+item_url
        print(item_name,item_url)





